In [1]:
# move files into place that we need
!touch citation_scripts/__init__.py
!cp citation_scripts/apidois.json .
In [1]:
from citation_scripts import api_utilities
import json
import os
import sys
from IPython.display import clear_output
Get the list of DOIs from the provided json file:
In [2]:
with open("apidois.json", 'r') as fh:
dois = sorted(json.load(fh))
For each DOI, use the rich citations API to download the full citation data. Save it to a JSON file in the citation_data
folder:
In [3]:
for i, doi in enumerate(dois):
pth = "citation_data/doi_{:05d}.json".format(i)
# skip DOIs that we have already fetched
if os.path.exists(pth):
continue
# print out progress
clear_output()
print("{} --> {}".format(doi, pth))
sys.stdout.flush()
# download the citation and save it
citation = api_utilities.citations(doi)
with open(pth, "w") as fh:
json.dump(citation, fh)